library(dplyr)
library(stringr)
library(lubridate)
library(Matrix)
library(igraph)
library(bipartite)
library(data.table)
library(ggplot2)
library(gganimate)
library(tidyr)
library(viridis)
library(asnipe)
library(PupillometryR)
library(cowplot)
library(DescTools)
set.seed(1234)
#set colors
col_year<-viridis(n=2, option="mako", begin=0.25)
col_sfeeder<-viridis(n=2, option="turbo", begin=0.3)
col_sites<-viridis(n=8)
For 2020 data, Boat Ramp = Site 1 and Crown College = Site 3.
setwd("~/Carp/2019/")
#load and clean Boat Ramp and College Crown Site data
pit_capture <-read.csv("~/Carp/2019/test_2019_01_21.csv", stringsAsFactors = FALSE, colClasses = "character")
pit_capture<- select(pit_capture, c(Lake, PIT, Length, Sex))
pit_capture$PIT<-as.character(pit_capture$PIT)
#Boat Ramp site
boatramp<-read.table("~/Carp/2019/Boat_ramp_complete.log", header=FALSE, fill=TRUE,col.names=c("D", "Date", "Time", "Time2", "HA", "PIT", "Antenna", "Col1","Col2"), na.strings=c("", "NA"))
boatramp<-select(boatramp, c(Date, Time, PIT, Antenna))
boatramp$DATETIME<- as.POSIXct(paste(boatramp$Date,boatramp$Time), format='%Y-%m-%d %H:%M:%S', tz="CDT") #Reformat as POSIXct
boatramp<-boatramp[-which(is.na(boatramp$DATETIME)),] #remove NA values
boatramp$PIT<-as.character(boatramp$PIT) #toString(boatramp$PIT)
boatramp$PIT<-str_sub(boatramp$PIT, 11, 16)
boatramp$PIT[boatramp$PIT==""] <- NA
boatramp<-boatramp[-which(is.na(boatramp$PIT)),]
boatramp<-boatramp[-which(boatramp$PIT=="390146"),] #This PIT ID has a 2018 read (prior to data collection start)
boatramp$Site<-"B"
# write.csv( boatramp, "cleaned_boatramp.csv")
merged_br<- merge(boatramp, pit_capture, by="PIT")
#Crown Site
crown<-read.table("~/Carp/2019/crown_complete.csv", header=FALSE, fill=TRUE,col.names=c("D", "Date", "Time", "Time2", "HA", "PIT", "Antenna", "Col1","Col2"), na.strings=c("", "NA"), stringsAsFactors = FALSE)
crown<-select(crown, c(Date, Time, PIT, Antenna))
crown<-crown[-which(is.na(crown$Antenna)),]
crown<-crown[which(crown$Antenna %in% c("A1", "A2", "A3")),]
crown$Antenna<-as.character(crown$Antenna)
crown$DATETIME<- as.POSIXct(paste(as.character(crown$Date),as.character(crown$Time)), format='%Y-%m-%d %H:%M:%S', tz="CDT") #Reformat as POSIXct
crown<-crown[-which(is.na(crown$DATETIME)),] #remove NA values
crown$PIT<-as.character(crown$PIT) #toString(crown$PIT)
crown$PIT<-str_sub(crown$PIT, 11, 16)
crown$PIT[crown$PIT==""] <- NA
# crown<-crown[-which(is.na(crown$PIT)),]
crown$Site<-"C"
crown<-crown[which(crown$DATETIME>"2019-01-01 12:00:00 CDT"),]
crown<-na.omit(crown)
crown<-crown[-which(grepl("[[:alpha:]]", crown$PIT)),]
crown<-crown[-which(crown$PIT=="000000"),] #remove a few more suspect IDs
crown<-crown[-which(crown$PIT=="75241"),]
merged_crown<- merge(crown, pit_capture, by="PIT")
merged_2019<-rbind(merged_br,merged_crown)
#subdivide data based on phases of experiment
#first day of corn baiting- July 30th, 2019- corn out by 1:30 PM at Crown and 12:15 PM at Boat Ramp
#first day of net installation- August 16, 2019- 3 PM at Boat Ramp and 4 PM at Crown
#first day of capture- August 19th
# min(merged_br$DATETIME)
# max(merged_br$DATETIME)
br_natural<- merged_br %>% filter( DATETIME < as.POSIXct("2019-07-30 12:15:00", tz="CDT"))
br_corn<- merged_br %>% filter(DATETIME >=as.POSIXct("2019-07-30 12:15:00", tz="CDT") & DATETIME < as.POSIXct("2019-08-16 15:00:00", tz="CDT"))
br_netting<- merged_br %>% filter(DATETIME > as.POSIXct("2019-08-16 15:00:00", tz="CDT"))
crown_natural<- merged_crown %>% filter( DATETIME < as.POSIXct("2019-07-30 13:30:00", tz="CDT"))
crown_corn<- merged_crown %>% filter(DATETIME >=as.POSIXct("2019-07-30 13:30:00", tz="CDT") & DATETIME < as.POSIXct("2019-08-16 16:00:00", tz="CDT"))
crown_netting<- merged_crown %>% filter(DATETIME > as.POSIXct("2019-08-16 16:00:00", tz="CDT"))
#In 2019, how many fish were captured and PIT tagged while electrofishing Parley Lake?
parley_capture<-pit_capture[which(pit_capture$Lake=="Parley"),]
length(unique(parley_capture$PIT))
## [1] 304
#In 2019, how many total detections were there in Parley?
nrow(merged_br)+nrow(merged_crown)
## [1] 157973
#Just during pre-baiting and baiting periods?
nrow(br_natural)+nrow(crown_natural)+ nrow(br_corn)+nrow(crown_corn)
## [1] 96030
#How many unique fish during pre-baiting period?
total_natural<-rbind(br_natural, crown_natural)
nrow(total_natural)
## [1] 184
length(unique(total_natural$PIT))
## [1] 11
#Just during baiting period?
nrow(br_corn)+nrow(crown_corn)
## [1] 95846
#How many unique fish during baiting period?
total_corn<-rbind(br_corn, crown_corn)
length(unique(total_corn$PIT))
## [1] 107
#How many unique fish during baiting period at BR?
length(unique(br_corn$PIT))
## [1] 70
#How many unique fish during baiting period at Crown?
length(unique(crown_corn$PIT))
## [1] 75
#how many fish appeared in both BR and Crown sites?
sum(unique(crown_corn$PIT) %in% unique(br_corn$PIT))
## [1] 38
#first detection 2019
first_detect_2019<-total_corn %>% group_by(PIT) %>% arrange(DATETIME) %>% summarize(DATEIME=first(DATETIME))
setwd("~/Carp/2020")
#Capture data from 2019/2020: refromat capture date as POISXct; retain PIT#, species, and capture date
pit_capture <-read.csv("All_PIT.csv", stringsAsFactors = FALSE, colClasses = "character")
pit_capture<- select(pit_capture, c(Lake, PIT, Length, Sex))
pit_capture$PIT<-as.character(pit_capture$PIT)
pit_capture<-pit_capture[!apply(pit_capture == "", 1, all),] #remove empty rows from initial read in
pit_capture<-pit_capture[which(pit_capture$Lake=="Parley"),]#limit to Lake Parley
head(pit_capture)
## Lake PIT Length Sex
## 2 Parley 497897 560 M
## 4 Parley 497857 720 F
## 6 Parley 497819 585 M
## 8 Parley 497887 690 M
## 10 Parley 497803 640 U
## 12 Parley 497862 740 U
length(unique(pit_capture$PIT))
## [1] 335
#Read in individual site data logs
s1 <-read.csv("SITE_1_2020.csv")
s2 <-read.csv("SITE_2_2020.csv")
s3 <-read.csv("SITE_3_2020.csv")
s4 <-read.csv("SITE_4_2020.csv")
s5 <-read.csv("SITE_5_2020.csv")
s6 <-read.csv("SITE_6_2020.csv")
s7 <-read.csv("SITE_7_2020.csv")
s8 <-read.csv("SITE_8_2020.csv")
all_sites <- rbind(s1,s2,s3,s4,s5,s6,s7,s8, Fill=NA)
# all_sites$PIT<-str_sub(all_sites$X, -6, -1) #note that in original data set length of some abbreviated pit numbers was only five characters (e.g #38305)
all_sites<-select(all_sites, c(Date, Time, PIT, Site))
all_sites$DATETIME<- as.POSIXct(paste(all_sites$Date,all_sites$Time), format='%m/%d/%y %I:%M:%S %p', tz="CDT") #Reformat as POSIXct
all_sites$PIT<-as.character(all_sites$PIT)
all_sites$Site<-as.character(all_sites$Site)
all_sites<-all_sites[-which(is.na(all_sites$PIT)),]
head(all_sites)
## Date Time PIT Site DATETIME
## 1 6/24/20 1:13:51 PM 567699 1 2020-06-24 13:13:51
## 2 6/24/20 1:13:52 PM 567699 1 2020-06-24 13:13:52
## 3 6/24/20 1:14:27 PM 567699 1 2020-06-24 13:14:27
## 4 6/24/20 1:14:32 PM 567699 1 2020-06-24 13:14:32
## 5 6/24/20 1:14:32 PM 567699 1 2020-06-24 13:14:32
## 6 6/24/20 1:14:34 PM 567699 1 2020-06-24 13:14:34
merged_all_sites<- merge(all_sites, pit_capture, by="PIT")
head(merged_all_sites)
## PIT Date Time Site DATETIME Lake Length Sex
## 1 38305 8/4/20 11:23:51 AM 4 2020-08-04 11:23:51 Parley 685 M
## 2 38305 8/4/20 11:23:50 AM 4 2020-08-04 11:23:50 Parley 685 M
## 3 38305 8/4/20 11:23:37 AM 4 2020-08-04 11:23:37 Parley 685 M
## 4 38305 8/4/20 11:23:49 AM 4 2020-08-04 11:23:49 Parley 685 M
## 5 38305 8/4/20 11:53:16 AM 4 2020-08-04 11:53:16 Parley 685 M
## 6 38305 8/4/20 11:53:06 AM 4 2020-08-04 11:53:06 Parley 685 M
merged_s1<-merged_all_sites[which(merged_all_sites$Site=="1"),]
merged_s2<-merged_all_sites[which(merged_all_sites$Site=="2"),]
merged_s3<-merged_all_sites[which(merged_all_sites$Site=="3"),]
merged_s4<-merged_all_sites[which(merged_all_sites$Site=="4"),]
merged_s5<-merged_all_sites[which(merged_all_sites$Site=="5"),]
merged_s6<-merged_all_sites[which(merged_all_sites$Site=="6"),]
merged_s7<-merged_all_sites[which(merged_all_sites$Site=="7"),]
merged_s8<-merged_all_sites[which(merged_all_sites$Site=="8"),]
#Set up corn period
#subdivide data based on phases of experiment
#corn/bait, July 3, 2020
#first net deployment and capture attempt, July 23, 2020- 10 AM
all_sites_natural<- merged_all_sites %>% filter( DATETIME < as.POSIXct("2020-07-03 12:00:00", tz="CDT"))
all_sites_corn<- merged_all_sites %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
s1_corn<- merged_s1 %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
s2_corn<- merged_s2 %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
s3_corn<- merged_s3 %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
s4_corn<- merged_s4 %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
s5_corn<- merged_s5 %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
s6_corn<- merged_s6 %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
s7_corn<- merged_s7 %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
s8_corn<- merged_s8 %>% filter(DATETIME >=as.POSIXct("2020-07-03 12:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-23 12:00:00", tz="CDT"))
#Size range and sex summary of tagged carp
pit_capture<-pit_capture %>% mutate(Length=as.numeric(Length), Sex=as.factor(Sex))
summary(pit_capture)
## Lake PIT Length Sex
## Length:344 Length:344 Min. : 57.0 : 37
## Class :character Class :character 1st Qu.:567.0 F:102
## Mode :character Mode :character Median :660.0 M:193
## Mean :615.3 U: 12
## 3rd Qu.:705.0
## Max. :860.0
## NA's :47
sd(pit_capture$Length, na.rm=TRUE)
## [1] 131.5747
#In 2020, how many total detections were there in Parley?
nrow(all_sites)
## [1] 1184294
#Just during pre-baiting period?
nrow(all_sites_natural)
## [1] 74
length(unique(all_sites_natural$PIT))
## [1] 12
#Just during baiting period?
nrow(all_sites_corn)
## [1] 616593
#How many unique fish during baiting period?
length(unique(all_sites_corn$PIT))
## [1] 133
#2019 participation vs. 2020 participation
#how many 2019 fish also appeared in 2020 season?
sum(unique(total_corn$PIT) %in% unique(all_sites_corn$PIT))
## [1] 50
#how many 2019 fish did not reappear in 2020 season?
sum(!(unique(total_corn$PIT) %in% unique(all_sites_corn$PIT)))
## [1] 57
#how many 2020 fish also appeared in 2019 season?
sum(unique(all_sites_corn$PIT) %in% unique(total_corn$PIT))
## [1] 50
#how many fish were new to 2020 season?
sum(!(unique(all_sites_corn$PIT) %in% unique(total_corn$PIT)))
## [1] 83
#How many total detections for 2019 & 2020 in the pre-baiting and baiting periods?
nrow(total_natural)+ nrow(total_corn)+ nrow(all_sites_natural)+nrow(all_sites_corn)
## [1] 712697
###First detection date
first_detect2020<-all_sites_corn %>% group_by(PIT) %>% arrange(DATETIME) %>% summarize(DATEIME=first(DATETIME))
Uniquely detected carp at each site.
unique_fish<-c(
length(unique(s1_corn$PIT)),
length(unique(s2_corn$PIT)),
length(unique(s3_corn$PIT)),
length(unique(s4_corn$PIT)),
length(unique(s5_corn$PIT)),
length(unique(s6_corn$PIT)),
length(unique(s7_corn$PIT)),
length(unique(s8_corn$PIT)))
#how many fish are only detected at the one given site?
uniquely_detected<-c(
sum(!(unique(s1_corn$PIT) %in% c(unique(s2_corn$PIT), unique(s3_corn$PIT), unique(s4_corn$PIT), unique(s5_corn$PIT), unique(s6_corn$PIT), unique(s7_corn$PIT), unique(s8_corn$PIT)))),
sum(!(unique(s2_corn$PIT) %in% c(unique(s1_corn$PIT), unique(s3_corn$PIT), unique(s4_corn$PIT), unique(s5_corn$PIT), unique(s6_corn$PIT), unique(s7_corn$PIT), unique(s8_corn$PIT)))),
sum(!(unique(s3_corn$PIT) %in% c(unique(s1_corn$PIT), unique(s2_corn$PIT), unique(s4_corn$PIT), unique(s5_corn$PIT), unique(s6_corn$PIT), unique(s7_corn$PIT), unique(s8_corn$PIT)))),
sum(!(unique(s4_corn$PIT) %in% c(unique(s1_corn$PIT), unique(s2_corn$PIT), unique(s3_corn$PIT), unique(s5_corn$PIT), unique(s6_corn$PIT), unique(s7_corn$PIT), unique(s8_corn$PIT)))),
sum(!(unique(s5_corn$PIT) %in% c(unique(s1_corn$PIT), unique(s2_corn$PIT), unique(s3_corn$PIT), unique(s4_corn$PIT), unique(s6_corn$PIT), unique(s7_corn$PIT), unique(s8_corn$PIT)))),
sum(!(unique(s6_corn$PIT) %in% c(unique(s1_corn$PIT), unique(s2_corn$PIT), unique(s3_corn$PIT), unique(s4_corn$PIT), unique(s5_corn$PIT), unique(s7_corn$PIT), unique(s8_corn$PIT)))),
sum(!(unique(s7_corn$PIT) %in% c(unique(s1_corn$PIT), unique(s2_corn$PIT), unique(s3_corn$PIT), unique(s4_corn$PIT), unique(s5_corn$PIT), unique(s6_corn$PIT), unique(s8_corn$PIT)))),
sum(!(unique(s8_corn$PIT) %in% c(unique(s1_corn$PIT), unique(s2_corn$PIT), unique(s3_corn$PIT), unique(s4_corn$PIT), unique(s5_corn$PIT), unique(s6_corn$PIT), unique(s7_corn$PIT)))))
supp_table1<-data.frame(unique_fish, uniquely_detected)
supp_table1$percent_unique<-uniquely_detected/unique_fish*100
#' Multiplot function
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols), byrow=TRUE)
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
#Bipartite network for boat ramp site
edgelist<-data.frame(PIT=merged_all_sites$PIT, Reader=merged_all_sites$Site)
A <- spMatrix(nrow=length(unique(edgelist$PIT)),
ncol=length(unique(edgelist$Reader)),
i = as.numeric(factor(edgelist$PIT)),
j = as.numeric(factor(edgelist$Reader)),
x = rep(1, length(as.numeric(edgelist$PIT))) )
row.names(A) <- levels(factor(edgelist$PIT))
colnames(A) <- levels(factor(edgelist$Reader))
#using carp data
bi<-graph.incidence(A, mode="all") #undirected, named graph that is bipartite
pr<-bipartite.projection(bi, multiplicity=TRUE)
#co-membership network of nodes ($proj1), or a network of groups that share members ($proj2)
# get.adjacency(pr$proj1,sparse=FALSE,attr="weight")
h<-pr$proj2
l3 <-layout_in_circle(h, order=c(6, 5, 4, 3, 2, 1, 8, 7))
plot.igraph(h,vertex.label=V(h)$name,layout=l3,edge.width=E(h)$weight/10^8, edge.color="black")
#Compare edgeweights between 5--6 and 6--7
E(h)$weight
## [1] 111851326 191928203 43356574 25291398 57497296 85615331 2238579
## [8] 54021798 92771892 312833260 46147318 209375800 163625305 68097686
## [15] 79969604 196088765 71872072 1445880 9622021 716227 167682352
## [22] 4723537 88431811 148497149 4045096 606465332 139179148 89390881
E(h)[23]
## + 1/28 edge from 4b9722c (vertex names):
## [1] 5--6
E(h)[26]
## + 1/28 edge from 4b9722c (vertex names):
## [1] 6--7
E(h)[26]$weight/E(h)[23]$weight
## [1] 6.858
# Version #2 including a legend for edge weights
n<-6
size_vec<-seq_len(n)
sizeCut<-cut(E(h)$weight/10^8,n)
edge_weight<-size_vec[sizeCut]
setwd("~/Carp/Figures")
png(filename="Figure_1B.png")
plot.igraph(h, vertex.label=V(h)$name,layout=l3, edge.width=edge_weight, edge.color="black")
scaled <- 1 + ((n-1) * (size_vec - min(size_vec) ) / ( max(size_vec) - min(size_vec) ) )
legend('topleft', legend=levels(sizeCut), lwd=scaled, col='black')
dev.off()
## png
## 2
#plot distance vs. edge weight
setwd("~/Carp/2020")
library(readxl)
distances<-read_excel("distance.xlsx") %>% rename(Site=`...1`) %>% pivot_longer(!Site, names_to = "Site2", values_to = "Distance") %>% filter(Distance!="x")%>% mutate(Distance=as.numeric(Distance)) %>%
unite("edge", Site:Site2, sep = "--", remove = FALSE)
edges<-data.frame(Site=get.edgelist(h)[,1], Site2=get.edgelist(h)[,2], weight=E(h)$weight) %>%
unite("edge", Site:Site2, sep = "--", remove = FALSE) %>% left_join(distances, by=c("edge"))
C<- edges %>% ggplot(aes(x=Distance, y=weight))+ geom_point(alpha=0.5)+ theme_classic()+xlab("Distance (m)")+ ylab("Edge weight")+ geom_text(aes(label = edge), check_overlap= TRUE, colour="black",vjust="inward", hjust="inward", nudge_x=100)
events_s1 <- readRDS("~/Carp/2020/events_s1.RDS")
gbi_s1 <- readRDS("~/Carp/2020/gbi_s1.RDS")
#time course of individual carp visits to each site during baiting period (July 3rd- July 23rd 2020)
A<-ggplot(data=s1_corn, aes(DATETIME, as.factor(PIT))) +
geom_point(alpha=0.5)+
scale_x_datetime(date_breaks="1 day")+
# ggtitle("Site 1")+
xlab("Date")+
ylab("Carp PIT ID")+
theme_bw()+
theme(axis.text.x=element_text(angle=90), axis.text.y=element_text(size=6), axis.title = element_text(size=10))
##add reference values for date/time
s1_corn<-s1_corn[order(s1_corn$DATETIME),] #order chronologically
# s1_corn<-s1_corn[1:2000,] #limit number of points for analysis
Time<-vector(mode="integer", length=nrow(s1_corn)) #initialize empty vectors for numeric date and time
Date<-vector(mode="integer", length=nrow(s1_corn))
for(i in 1:nrow(s1_corn)){
Time[i]<-as.integer(difftime(s1_corn$DATETIME[i],s1_corn$DATETIME[1],units='secs'))
Date[i]<-as.integer(difftime(s1_corn$DATETIME[i],s1_corn$DATETIME[1],units='days'))
}
s1_corn_gmm<-s1_corn
s1_corn_gmm$refTime<-Time
s1_corn_gmm$refDate<-Date
#subset data for a single day
gmm_sub<- s1_corn_gmm %>% filter(DATETIME >=as.POSIXct("2020-07-16 17:00:00", tz="CDT") & DATETIME < as.POSIXct("2020-07-16 20:00:00", tz="CDT"))
min_time<-gmm_sub$refTime[1]
max_time<-gmm_sub$refTime[nrow(gmm_sub)]
#starting reference for noon 7/16/20= 1013418
events_sub<-events_s1 %>% filter(Start >=min_time & End<=max_time & Location=="1") %>% arrange(Start)
events_sub$DATETIME_start <- as.POSIXct(events_sub$Start, origin = "2020-07-04 18:29:42",tz = "CDT", format='%Y-%m-%d %H:%M:%S')
events_sub$DATETIME_end <- as.POSIXct(events_sub$End, origin = "2020-07-04 18:29:42",tz = "CDT", format='%Y-%m-%d %H:%M:%S')
events_sub$Event<-seq(1:nrow(events_sub))
B<-ggplot()+
geom_point(data=gmm_sub, aes(DATETIME, as.factor(PIT), alpha=0.5))+
scale_x_datetime(date_breaks="1 hour", date_labels = "%H:%M")+
# ggtitle("Site 1")+
xlab("Time (Hours) on July 16, 2020")+
ylab("Carp PIT ID")+
scale_fill_viridis_d()+
geom_rect(aes(xmin = DATETIME_start, xmax = DATETIME_end, fill = as.factor(Event)),
ymin = -Inf, ymax = Inf, alpha = 0.3,
data = events_sub)+
theme_bw()+
theme(axis.text.y=element_text(size=8), axis.title = element_text(size=10), legend.position = "none")
multiplot(A, B, cols=1)
#number of events detected by day
events_by_day<-table(as.Date(s1_corn_gmm$DATETIME))
mean(events_by_day)
## [1] 2395.15
min(events_by_day)
## [1] 6
max(events_by_day)
## [1] 7831
events_parley2020 <- readRDS("~/Carp/2020/events_parley2020.RDS")
gbi_parley2020 <- readRDS("~/Carp/2020/gbi_parley2020.RDS")
#histogram of group size for all of Lake Parley
parley2020_gs<-data.frame(bout=1:nrow(gbi_parley2020), groupsize=rowSums(gbi_parley2020))
parley2020_gs$dur<-events_parley2020$End-events_parley2020$Start
parley2020_gs$dur_min<-parley2020_gs$dur/60
head(parley2020_gs)
## bout groupsize dur dur_min
## 1 1 2 1139 18.983333
## 2 2 3 443 7.383333
## 3 3 2 270 4.500000
## 4 4 2 751 12.516667
## 5 5 2 174 2.900000
## 6 6 2 494 8.233333
A<-ggplot(parley2020_gs, aes(groupsize)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
scale_y_continuous(trans = "log10")+
xlab("Number of carp per feeding bout")+
ylab(expression(log[10]*"(Number of observations)"))+
ggtitle("(A)")+
theme_bw()
# ggtitle("Lake Parley: Average group size")
A
#histogram of feeding bout duration for all of Lake Parley
# ggplot(parley2020_gs, aes(dur_min)) +
# geom_histogram(binwidth=1,boundary=-0.5, color="black", fill="darkcyan") +
# scale_x_continuous()+
# xlab("Duration of feeding bout (min)")+
# ylab("Number of times observed")+
# ggtitle("Lake Parley: Duration of feeding bout (min)")
B<- ggplot(parley2020_gs, aes(dur_min)) +
geom_histogram(color="black", fill=col_year[2]) +
scale_x_continuous(trans = "log2")+
scale_y_continuous(trans = "log10")+
xlab(expression(log[2]*"(Duration of feeding bout) (min)"))+
ylab(expression(log[10]*"(Number of observations)"))+
ggtitle("(B)")+
theme_bw()
# ggtitle("Lake Parley: Duration of feeding bout (min)")
B
#QQ plot of group size vs. bout duration Boat Ramp (separate antenna)
# C<-ggplot(parley2020_gs, aes(x=dur_min, y=(as.factor(groupsize)))) +
# # geom_point(size=2, shape=19, alpha=0.6)+
# geom_density() +
# scale_x_continuous(trans = "log2")+
# xlab("Bout duration (min)")+
# ylab("Group size")+
# ggtitle("(C)")+
# theme_bw()
# # ggtitle("Lake Parley")
C <- ggplot(parley2020_gs,aes(x=as.factor(groupsize),y=dur_min, fill = as.factor(groupsize)))+
# geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2)+
geom_boxplot()+
# geom_point(position = position_jitter(width = .15), size = .25, alpha=0.05)+
coord_flip()+theme_cowplot()+
scale_y_continuous(trans = "log2")+
ylab("Bout duration (min)")+
xlab("Group size")+
# ggtitle("(C)")+
theme(legend.position = "none")
C
# multiplot(A,B,C, cols=1)
top_row <- plot_grid(A, B) #, labels=c("(A)", "(B)"))
plot_grid(top_row, C, labels=c("", "(C)"), ncol = 1)
mean(parley2020_gs$dur_min)
## [1] 5.111328
median(parley2020_gs$dur_min)
## [1] 3.066667
max(parley2020_gs$dur_min)
## [1] 518.6333
start_date<-as.POSIXct("2020-07-03 12:00:00 CDT") #rounded to nearest midday
end_date<-as.POSIXct("2020-07-23 12:00:00 CDT")
num_days<-as.numeric(end_date-start_date)
#Histogram of the number of sampling days each fish visits Site 1
superfish_s1<-calc_nvisit(df=s1_corn, num_days=num_days, start_date=start_date)
A<- ggplot(superfish_s1,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("Number of sampling days")+
ylab("Number of unique fish")+
ggtitle("(A)")+
theme_bw()
# ggtitle("(Site 1: Baiting Period")
superfish_s2<-calc_nvisit(df=s2_corn, num_days=num_days, start_date=start_date)
superfish_s3<-calc_nvisit(df=s3_corn, num_days=num_days, start_date=start_date)
superfish_s4<-calc_nvisit(df=s4_corn, num_days=num_days, start_date=start_date)
superfish_s5<-calc_nvisit(df=s5_corn, num_days=num_days, start_date=start_date)
superfish_s6<-calc_nvisit(df=s6_corn, num_days=num_days, start_date=start_date)
superfish_s7<-calc_nvisit(df=s7_corn, num_days=num_days, start_date=start_date)
superfish_s8<-calc_nvisit(df=s8_corn, num_days=num_days, start_date=start_date)
#Combined plot showing site preference and number of detected visits by fish
superfish_s1$Site<-"Site1"
superfish_s2$Site<-"Site2"
superfish_s3$Site<-"Site3"
superfish_s4$Site<-"Site4"
superfish_s5$Site<-"Site5"
superfish_s6$Site<-"Site6"
superfish_s7$Site<-"Site7"
superfish_s8$Site<-"Site8"
all_superfish <- rbind(superfish_s1,superfish_s2,superfish_s3,superfish_s4,superfish_s5,superfish_s6,superfish_s7,superfish_s8, Fill=NA)
all_superfish<-na.omit(all_superfish)
superfish_wide <- spread(all_superfish, Site, nvisit) #convert from long to wide data format
# superfish_wide <- subset(superfish_wide,select = fishID:Site8)
superfish_wide[is.na(superfish_wide)] <- 0 #convert NAs to zero
superfish_wide$total <- rowSums(superfish_wide[2:9])
superfish_wide<-superfish_wide[order(superfish_wide$total, decreasing=TRUE),]
# superfish_wide$short_lab<-LETTERS[seq( from = 1, to = nrow(superfish_wide))]
all_superfish$fishID <- factor(all_superfish$fishID, levels = superfish_wide$fishID[order(superfish_wide$total, decreasing=TRUE)])
top20<-round(length(levels(all_superfish$fishID))*.2)
cutoff<-levels(all_superfish$fishID)[top20]
B<-ggplot(all_superfish, aes(fill=Site, y=nvisit, x=fishID))+
geom_col()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line=element_blank(), axis.text.x=element_blank(), axis.ticks = element_blank())+
scale_fill_viridis(discrete=TRUE, name = "Site", labels = c("Site 1", "Site 2", "Site 3", "Site 4", "Site 5", "Site 6", "Site 7", "Site 8"))+
labs(y= "Visits per unique day", x = "Individual carp")+
geom_vline(xintercept=which(levels(all_superfish$fishID) == cutoff))+
ggtitle("(B)")
B
levels(all_superfish$fishID)
## [1] "497766" "497661" "775172" "497649" "497690" "497871" "775215" "497890"
## [9] "775203" "497771" "497672" "497896" "775194" "38368" "38397" "497645"
## [17] "497686" "497608" "775239" "497601" "497815" "497862" "497869" "497891"
## [25] "775204" "775219" "775222" "775241" "497631" "775170" "497637" "497764"
## [33] "497827" "38316" "38364" "497892" "497666" "497673" "775186" "38320"
## [41] "38372" "497651" "497681" "775217" "38357" "497697" "38305" "497633"
## [49] "497826" "38389" "497653" "38398" "38314" "38339" "38355" "497604"
## [57] "497778" "497626" "497641" "497659" "497894" "38385" "497635" "497642"
## [65] "497794" "497625" "497627" "497746" "497840" "497867" "775182" "775213"
## [73] "38333" "38338" "497600" "497667" "497717" "775176" "38306" "38326"
## [81] "38348" "497716" "497805" "775196" "775234" "775249" "38322" "38329"
## [89] "497656" "497736" "497809" "497811" "497898" "775177" "775235" "38341"
## [97] "38358" "497620" "497655" "497664" "497688" "497692" "497700" "497818"
## [105] "497828" "497833" "497851" "497875" "775168" "775173" "775174" "775187"
## [113] "38345" "497617" "497621" "497623" "497632" "497654" "497658" "497687"
## [121] "497696" "497729" "497780" "497804" "497807" "497822" "497841" "497843"
## [129] "497857" "497861" "497874" "497877" "497883"
top20_ID<-levels(all_superfish$fishID)[1:top20]
top20_superfish<-all_superfish[which(all_superfish$fishID %in% top20_ID),]
inset.plot<- ggplot(top20_superfish, aes(fill=Site, y=nvisit, x=fishID))+
geom_col()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.ticks = element_blank(), axis.text.x = element_text(size=6, angle = 90, hjust = 1), legend.position = "none")+
scale_fill_viridis(discrete=TRUE, name = "Site", labels = c("Site 1", "Site 2", "Site 3", "Site 4", "Site 5", "Site 6", "Site 7", "Site 8"))+
labs(y= "Visits", x = "")
inset.plot
library(cowplot)
plot.with.inset <-
ggdraw() +
draw_plot(B) +
draw_plot(inset.plot, x = 0.3, y = .45, width = .55, height = .55)
plot.with.inset
#scatter plot of number of visits vs % of time spent at preferred/dominate/most detected site
superfeeder_bool<-data.frame(fishID=all_superfish$fishID, top20=(all_superfish$fishID %in% top20_ID))
superfish_wide$max_per_site<- apply(X=superfish_wide[,2:9], MARGIN=1, FUN=max)
superfish_wide$percentage_max<-superfish_wide$max_per_site/superfish_wide$total*100
superfish_wide$top20<-superfeeder_bool$top20[match(superfish_wide$fishID,superfeeder_bool$fishID)]
C<- ggplot(superfish_wide, aes(x=total, y=percentage_max))+
geom_point(aes(colour = top20, shape=top20), alpha=0.5)+
# geom_abline(intercept = 0, slope = 100/30)+
xlab("Total number of visits")+
ylab("Proportion of visits at top visited Site")+
ggtitle("(C)")+
# ggtitle("Relative 'Site Fidelity' for Superfeeders")+
geom_text(aes(label = ifelse(top20 == 'TRUE', fishID, NA)), check_overlap= TRUE, colour="black", vjust="inward", hjust="inward")+
scale_shape_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=c(15,17))+
scale_colour_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=col_sfeeder)+
theme_bw()
C
multiplot(A, plot.with.inset, C, cols=1)
top20_ID_2020<-top20_ID
#Lorenz curve applied to unique daily site visits
lorenz<-all_superfish %>% group_by(fishID) %>% summarize(nvisit=sum(nvisit)) %>% arrange(nvisit) %>% mutate(forage_activity_rank= 1:nrow(.), prop_forager= forage_activity_rank/nrow(.), cum_visits=cumsum(nvisit)/sum(nvisit), one_to_one= prop_forager)
fwrite(lorenz, "/research-home/lwhite/Carp/Superfeeder_rankings/daily_visits2020.csv")
#Gini coefficient calculation
areaB<-AUC(x=append(0, lorenz$prop_forager), y=append(0, lorenz$cum_visits), method="trapezoid")
areaA<-0.5-areaB
(gini_coeff<-areaA/(areaA+areaB))
## [1] 0.498762
lorenzA<-lorenz %>% ggplot()+geom_line(aes(x=prop_forager, y=cum_visits))+ geom_line(aes(x=prop_forager, y=one_to_one), lty="dashed")+ theme_bw()+
geom_vline(xintercept = 0.8 , lty="dotted", col="dark red") +
geom_hline(yintercept = 0.5 , lty="dotted", col="dark red") +
geom_vline(xintercept = 0.59 , lty="dotted", col="dark blue") +
geom_hline(yintercept = 0.2 , lty="dotted", col="dark blue") +
xlab("Cumulative Fraction of Population Based on Foraging Ranking")+ ylab("Cumulative Number of\nUnique Daily Visits")+ scale_x_continuous(limits = c(0,1))+ scale_y_continuous(limits = c(0,1)) + ggtitle(paste("Lorenz Curve: 2020 Unique Daily Visits\n(Gini Coefficient:", round(gini_coeff,3), ")"))
#Lorenz curve applied to cumulative total detections
raw_superfish <- rbind(s1_corn, s2_corn, s3_corn, s4_corn, s5_corn, s6_corn, s7_corn, s8_corn, Fill=NA) %>% group_by(PIT) %>% summarise(count = n()) %>% arrange(count) %>% na.omit()
lorenz<-raw_superfish %>% mutate(forage_activity_rank= 1:nrow(.), prop_forager= forage_activity_rank/nrow(.), cum_visits=cumsum(count)/sum(count), one_to_one= prop_forager)
fwrite(lorenz, "/research-home/lwhite/Carp/Superfeeder_rankings/total_detections2020.csv")
#Gini coefficient calculation
areaB<-AUC(x=append(0, lorenz$prop_forager), y=append(0, lorenz$cum_visits), method="trapezoid")
areaA<-0.5-areaB
(gini_coeff<-areaA/(areaA+areaB))
## [1] 0.7922697
lorenzB<-lorenz %>% ggplot()+geom_line(aes(x=prop_forager, y=cum_visits))+ geom_line(aes(x=prop_forager, y=one_to_one), lty="dashed")+ theme_bw()+
geom_vline(xintercept = 0.8 , lty="dotted", col="dark red") +
geom_hline(yintercept = 0.17 , lty="dotted", col="dark red") +
geom_vline(xintercept = 0.82 , lty="dotted", col="dark blue") +
geom_hline(yintercept = 0.2 , lty="dotted", col="dark blue") +
xlab("Cumulative Fraction of Population Based on Foraging Ranking")+ ylab("Cumulative Number of\nTotal Detections")+ scale_x_continuous(limits = c(0,1))+ scale_y_continuous(limits = c(0,1))+ ggtitle(paste("Lorenz Curve: 2020 Total Visits\n(Gini Coefficient:", round(gini_coeff,3), ")"))
start_date<-as.POSIXct("2020-07-03 12:00:00 CDT") #rounded to nearest midday
end_date<-as.POSIXct("2020-07-23 12:00:00 CDT")
#Lorenz curve applied to cumulative total detections
raw_superfish <- rbind(s1_corn, s2_corn, s3_corn, s4_corn, s5_corn, s6_corn, s7_corn, s8_corn, Fill=NA) %>% filter(DATETIME >= start_date & DATETIME <= end_date) %>% group_by(PIT) %>% summarise(count = n()) %>% arrange(count) %>% na.omit()
lorenz<-raw_superfish %>% mutate(forage_activity_rank= 1:nrow(.), prop_forager= forage_activity_rank/nrow(.), cum_visits=cumsum(count)/sum(count), one_to_one= prop_forager)
# fwrite(lorenz, "/research-home/lwhite/Carp/Superfeeder_rankings/total_detections2020.csv")
#Gini coefficient calculation
areaB<-AUC(x=append(0, lorenz$prop_forager), y=append(0, lorenz$cum_visits), method="trapezoid")
areaA<-0.5-areaB
(gini_coeff<-areaA/(areaA+areaB))
## [1] 0.7922793
lorenzB<-lorenz %>% ggplot()+geom_line(aes(x=prop_forager, y=cum_visits))+ geom_line(aes(x=prop_forager, y=one_to_one), lty="dashed")+ theme_bw()+
geom_vline(xintercept = 0.8 , lty="dotted", col="dark red") +
geom_hline(yintercept = 0.17 , lty="dotted", col="dark red") +
geom_vline(xintercept = 0.82 , lty="dotted", col="dark blue") +
geom_hline(yintercept = 0.2 , lty="dotted", col="dark blue") +
xlab("Cumulative Fraction of Population Based on Foraging Ranking")+ ylab("Cumulative Number of\nTotal Detections")+ scale_x_continuous(limits = c(0,1))+ scale_y_continuous(limits = c(0,1))+ ggtitle("(A)")
#ggtitle(paste("Lorenz Curve: 2020 Total Visits\n(Gini Coefficient:", round(gini_coeff,3), ")"))
#Plot total visits as a summation of visits across sites by FishID
all_superfish <- rbind(s1_corn, s2_corn, s3_corn, s4_corn, s5_corn, s6_corn, s7_corn, s8_corn, Fill=NA) %>% filter(DATETIME >= start_date & DATETIME <= end_date) %>% group_by(PIT, Site) %>% summarise(nvisit = n()) %>% arrange(nvisit) %>% na.omit() %>% rename(fishID=PIT)
superfish_wide <- spread(all_superfish, Site, nvisit) #convert from long to wide data format
# superfish_wide <- subset(superfish_wide,select = fishID:Site8)
superfish_wide[is.na(superfish_wide)] <- 0 #convert NAs to zero
superfish_wide$total <- rowSums(superfish_wide[2:9])
superfish_wide<-superfish_wide[order(superfish_wide$total, decreasing=TRUE),]
# superfish_wide$short_lab<-LETTERS[seq( from = 1, to = nrow(superfish_wide))]
all_superfish$fishID <- factor(all_superfish$fishID, levels = superfish_wide$fishID[order(superfish_wide$total, decreasing=TRUE)])
# top20<-round(length(levels(all_superfish$fishID))*.18)
top20<-which(lorenz$prop_forager>0.82)[1]
# cutoff<-levels(all_superfish$fishID)[top20]
cutoff<-lorenz$PIT[top20]
B<-ggplot(all_superfish, aes(fill=Site, y=nvisit, x=fishID))+
geom_col()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line=element_blank(), axis.text.x=element_blank(), axis.ticks = element_blank())+
# scale_y_continuous(trans = "log2")+
scale_fill_viridis(discrete=TRUE, name = "Site", labels = c("Site 1", "Site 2", "Site 3", "Site 4", "Site 5", "Site 6", "Site 7", "Site 8"))+
labs(y= "Total cumulative detections", x = "Individual carp")+
geom_vline(xintercept=which(levels(all_superfish$fishID) == cutoff))+
ggtitle("(B)")
B
levels(all_superfish$fishID)
## [1] "38397" "775194" "497673" "497890" "497862" "497637" "497649" "497645"
## [9] "497672" "497766" "497666" "497815" "775172" "775215" "497661" "497690"
## [17] "38320" "497871" "775204" "497601" "775170" "497896" "497608" "775217"
## [25] "775222" "497631" "38368" "38398" "497892" "497697" "497716" "497686"
## [33] "497869" "497651" "775239" "497681" "775219" "497771" "775241" "497891"
## [41] "38339" "38372" "775182" "38329" "497833" "775203" "38316" "38357"
## [49] "497764" "497826" "38338" "38355" "775186" "497604" "38364" "497717"
## [57] "497827" "497778" "497633" "497867" "38314" "497635" "497653" "775196"
## [65] "38389" "497625" "497627" "497794" "497894" "497600" "38333" "497840"
## [73] "38322" "38305" "38385" "775168" "497822" "497626" "497667" "497656"
## [81] "497746" "497729" "775174" "497809" "775213" "775176" "497642" "775234"
## [89] "497877" "497700" "497736" "38326" "497659" "497811" "497780" "497664"
## [97] "38306" "38358" "497898" "38348" "497641" "775235" "497804" "497805"
## [105] "497875" "497851" "775173" "38341" "497861" "775249" "497687" "497696"
## [113] "775177" "497620" "497841" "497621" "497818" "38345" "497828" "497655"
## [121] "497692" "497654" "497688" "497857" "497874" "497617" "497883" "775187"
## [129] "497632" "497623" "497658" "497843" "497807"
top20_ID<-levels(all_superfish$fishID)[1:which(levels(all_superfish$fishID)==cutoff)]
top20_superfish<-all_superfish[which(all_superfish$fishID %in% top20_ID),]
inset.plot<- ggplot(top20_superfish, aes(fill=Site, y=nvisit, x=fishID))+
geom_col()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.ticks = element_blank(), axis.text.x = element_text(size=6, angle = 90, hjust = 1), legend.position = "none")+
scale_fill_viridis(discrete=TRUE, name = "Site", labels = c("Site 1", "Site 2", "Site 3", "Site 4", "Site 5", "Site 6", "Site 7", "Site 8"))+
labs(y= "Visits", x = "")
inset.plot
library(cowplot)
plot.with.inset <-
ggdraw() +
draw_plot(B) +
draw_plot(inset.plot, x = 0.3, y = .45, width = .55, height = .55)
plot.with.inset
#scatter plot of number of visits vs % of time spent at preferred/dominate/most detected site
superfeeder_bool<-data.frame(fishID=all_superfish$fishID, top20=(all_superfish$fishID %in% top20_ID))
superfish_wide$max_per_site<- apply(X=superfish_wide[,2:9], MARGIN=1, FUN=max)
superfish_wide$percentage_max<-superfish_wide$max_per_site/superfish_wide$total*100
superfish_wide$top20<-superfeeder_bool$top20[match(superfish_wide$fishID,superfeeder_bool$fishID)]
C<- ggplot(superfish_wide, aes(x=total, y=percentage_max))+
geom_point(aes(colour = top20, shape=top20), alpha=0.5)+
# geom_abline(intercept = 0, slope = 100/30)+
xlab("Total number of visits")+
ylab("Proportion of visits at top \nvisited Site")+
ggtitle("(C)")+
# ggtitle("Relative 'Site Fidelity' for Superfeeders")+
geom_text(aes(label = ifelse(top20 == 'TRUE', fishID, NA)), check_overlap= TRUE, colour="black", vjust="inward", hjust="inward")+
scale_shape_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=c(15,17))+
scale_colour_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=col_sfeeder)+
theme_bw()
C
multiplot(lorenzB, plot.with.inset, C, cols=1)
top20_ID_2020<-top20_ID
site_fidelity_top20<-superfish_wide %>% filter(top20==TRUE) %>% select(percentage_max)
site_fidelity_remaining80 <-superfish_wide %>% filter(top20==FALSE) %>% select(percentage_max)
mean(site_fidelity_top20$percentage_max)
## [1] 83.21445
mean(site_fidelity_remaining80$percentage_max, na.rm=TRUE)
## [1] 86.7702
sd(site_fidelity_top20$percentage_max)
## [1] 18.22688
sd(site_fidelity_remaining80$percentage_max)
## [1] 16.94116
t.test(site_fidelity_top20$percentage_max, site_fidelity_remaining80$percentage_max, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: site_fidelity_top20$percentage_max and site_fidelity_remaining80$percentage_max
## t = -0.87601, df = 32.333, p-value = 0.3875
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.820332 4.708837
## sample estimates:
## mean of x mean of y
## 83.21445 86.77020
superfish_wide2020<-superfish_wide
# Network of Parley
identities<-colnames(gbi_parley2020)
network <- matrix(0, ncol(gbi_parley2020), ncol(gbi_parley2020))
network<- get_network(gbi_parley2020, data_format="GBI",
association_index="HWI", identities=identities, times=events_parley2020$midpoint, start_time=min(events_parley2020$midpoint), end_time=max(events_parley2020$midpoint))
## Generating 133 x 133 matrix
rownames(network)<-identities
colnames(network)<-identities
net <- graph.adjacency(network, mode="undirected", diag=FALSE, weighted=TRUE)
deg_weighted <- graph.strength(net)
deg<-igraph::degree(net)
hist(deg_weighted, main="Histogram of Weighted Degree for Parley Network")
hist(deg, main="Histogram of Degree for Parley Network")
V(net)$size <- deg #deg_weighted*10
V(net)$top20=as.character(superfeeder_bool$top20[match(V(net)$name,superfeeder_bool$fishID)])
V(net)$color=V(net)$top20
V(net)$color=gsub(FALSE,"gray",V(net)$color)
V(net)$color=gsub(TRUE,"blue",V(net)$color)
#plot(net, vertex.color=V(net)$color, vertex.label= NA)
# install.packages("ggnetwork")
library(ggnetwork)
n<-ggnetwork(net)
A<-ggplot(n, aes(x = x, y = y, xend = xend, yend = yend)) +
geom_edges(color = "lightgrey", alpha=0.4) +
geom_nodes(aes(color = top20, size = size)) +
scale_color_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=col_sfeeder)+
scale_size_continuous(name="Degree")+
theme_blank()+
ggtitle("(A)")
network_stats_parley2020<-data.frame(nodes=as.character(V(net)), weighted_degree=igraph::graph.strength(net), degree=igraph::degree(net), betweenness=igraph::betweenness(net),transitivity=
igraph::transitivity(net, type="localundirected"), top20=superfeeder_bool$top20[match(V(net)$name,superfeeder_bool$fishID)])
network_stats_parley2020$node_names<-rownames(network_stats_parley2020)
parley2020_long<-gather(network_stats_parley2020, key=network_metric, value=value,
weighted_degree:transitivity)
# ggplot(network_stats_parley2020, aes(x=weighted_degree, y=betweenness, color=top20))+
# geom_point(aes(size=degree),alpha=0.5)+
# xlab("Weighted degree")+
# ylab("Betweenness")+
# ggtitle("Parley (all sites): Weighted degree vs. Betweenness")+
# geom_text(aes(label = ifelse(top20 == 'TRUE', node_names, NA)), check_overlap= TRUE, colour="black")
# # geom_text(aes(label=node_names),hjust=0, vjust=0)
# # geom_text(data=filter(network_stats_parley2020, top20=="TRUE"))
B<-ggplot(network_stats_parley2020, aes(x=degree, y=betweenness))+
geom_point(aes(colour = top20, shape=top20), alpha=0.5)+
xlab("Degree")+
ylab("Betweenness")+
ggtitle("(B)")+
# ggtitle("Parley (all sites): Degree vs. Betweenness")+
geom_text(aes(label = ifelse(top20 == 'TRUE', node_names, NA)), check_overlap= TRUE, colour="black",vjust="inward", hjust="inward")+
scale_shape_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=c(15,17))+
scale_color_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=col_sfeeder)+
theme_bw()
B
plot_grid(A, B, labels = c('A', 'B'))
multiplot(A,B, cols=2)
network_stats_top20<- network_stats_parley2020 %>% filter(top20==TRUE)
network_stats_bottom80 <- network_stats_parley2020 %>% filter(top20==TRUE)
#Degree
degree_top20<-network_stats_parley2020 %>% filter(top20==TRUE) %>% select(degree)
degree_remaining80<-network_stats_parley2020 %>% filter(top20==FALSE) %>% select(degree)
mean(degree_top20$degree)
## [1] 34.91667
mean(degree_remaining80$degree)
## [1] 10.6055
sd(degree_top20$degree)
## [1] 9.784267
sd(degree_remaining80$degree)
## [1] 9.139878
t.test(degree_top20, degree_remaining80, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: degree_top20 and degree_remaining80
## t = 11.149, df = 32.432, p-value = 1.247e-12
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 19.87165 28.75068
## sample estimates:
## mean of x mean of y
## 34.91667 10.60550
#Weighted degree
weighted_top20<-network_stats_parley2020 %>% filter(top20==TRUE) %>% select(weighted_degree)
weighted_remaining80<-network_stats_parley2020 %>% filter(top20==FALSE) %>% select(weighted_degree)
mean(weighted_top20$weighted_degree)
## [1] 1.249238
mean(weighted_remaining80$weighted_degree)
## [1] 0.2635821
sd(weighted_top20$weighted_degree)
## [1] 0.288219
sd(weighted_remaining80$weighted_degree)
## [1] 0.3019831
t.test(weighted_top20, weighted_remaining80, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: weighted_top20 and weighted_remaining80
## t = 15.035, df = 35.027, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.852569 1.118743
## sample estimates:
## mean of x mean of y
## 1.2492380 0.2635821
#Betweenness
betweenness_top20<-network_stats_parley2020 %>% filter(top20==TRUE) %>% select(betweenness)
betweenness_remaining80<-network_stats_parley2020 %>% filter(top20==FALSE) %>% select(betweenness)
mean(betweenness_top20$betweenness)
## [1] 604.2083
mean(betweenness_remaining80$betweenness)
## [1] 39.83486
sd(betweenness_top20$betweenness)
## [1] 513.8187
sd(betweenness_remaining80$betweenness)
## [1] 98.36562
t.test(betweenness_top20, betweenness_remaining80, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: betweenness_top20 and betweenness_remaining80
## t = 5.3594, df = 23.372, p-value = 1.825e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 346.7251 782.0218
## sample estimates:
## mean of x mean of y
## 604.20833 39.83486
Total daily detections of unique carp at all sites in Parley Lake for the pre-baiting and baiting periods in (A) 2019 and (B) 2020
#histogram of number of unique visitors by sampling day
#2019
start_date2019<-as.POSIXct("2019-07-23 12:00:00 CDT")
end_date2019<-as.POSIXct("2019-08-16 15:00:00 CDT")
num_days<-as.numeric(end_date2019-start_date2019)
sampling_df <-data.frame(day=1:num_days, date=seq(as.Date(start_date2019), by = "day", length.out = round(num_days)), uniqueID=rep(NA, num_days))
degree_list<-NULL
for(i in 1:num_days){
sub_combined<-merged_2019 %>% filter(DATETIME >= as.POSIXct(start_date2019+ (i-1)*86400, tz="CDT") & DATETIME <= as.POSIXct(start_date2019 + i*86400, tz="CDT")) #86400 seconds/24 hours
# print(sub_combined)
sampling_df$uniqueID[i]<-length(unique(sub_combined$PIT))
PITs<-unique(sub_combined$PIT)
if(length(PITs)>0){
carp.degree<-data.frame(PITs, numsites= rep(NA, length(PITs)))
for(j in 1:length(PITs)){
carp.degree$numsites[j]<-length(unique(sub_combined$Antenna[which(sub_combined$PIT==PITs[j])]))
}
degree_list[[i]]<-carp.degree
}
}
sampling_df$uniqueID<-as.numeric(sampling_df$uniqueID)
A<-ggplot(sampling_df, aes(x=date, y=uniqueID))+
geom_bar(stat="identity",fill=col_year[1])+
geom_vline(aes(xintercept = as.Date("2019-07-30")),
linetype = 4, colour = "black")+
xlab("Sampling Day (2019)")+
ylab("Unique fish detected per day")+
scale_x_date(date_breaks = "1 day", date_labels = "%b %d")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90))
# 2020
start_date2020<-as.POSIXct("2020-06-23 12:00:00 CDT")
end_date2020<-as.POSIXct("2020-07-23 12:00:00")
num_days<-as.numeric(end_date2020-start_date2020)
sampling_df <-data.frame(day=1:num_days, date=seq(as.Date(start_date2020), by = "day", length.out = round(num_days)), uniqueID=rep(NA, num_days))
degree_list<-NULL
for(i in 1:num_days){
sub_combined<-merged_all_sites %>% filter(DATETIME >= as.POSIXct(start_date2020+ (i-1)*86400, tz="CDT") & DATETIME <= as.POSIXct(start_date2020 + i*86400, tz="CDT")) #86400 seconds/24 hours
# print(sub_combined)
sampling_df$uniqueID[i]<-length(unique(sub_combined$PIT))
PITs<-unique(sub_combined$PIT)
if(length(PITs)>0){
carp.degree<-data.frame(PITs, numsites= rep(NA, length(PITs)))
for(j in 1:length(PITs)){
carp.degree$numsites[j]<-length(unique(sub_combined$Antenna[which(sub_combined$PIT==PITs[j])]))
}
degree_list[[i]]<-carp.degree
}
}
sampling_df$uniqueID<-as.numeric(sampling_df$uniqueID)
sampling_df$uniqueID<-as.numeric(sampling_df$uniqueID)
B<-ggplot(sampling_df, aes(x=date, y=uniqueID))+
geom_bar(stat="identity", fill=col_year[2])+
geom_vline(aes(xintercept = as.Date("2020-07-03")),
linetype = 4, colour = "black")+
xlab("Sampling Day (2020)")+
ylab("Unique fish detected per day")+
scale_x_date(date_breaks = "1 day", date_labels = "%b %d")+
theme_bw()+
theme(axis.text.x = element_text(angle = 90))
# tiff("S1.tiff", height =10 , width =8.7, units = "cm", compression = "lzw", res = 1200)
multiplot(A, B, cols=1)
# dev.off()
The probability density curve based on the total number of detections across a 24-hour cycle for the baiting period in both 2019 and 2020 in Parley Lake.
library(lubridate)
all_sites_corn$Year<-2020
all_sites_corn$Time<- as.POSIXct(all_sites_corn$Time, format=' %I:%M:%S %p', tz="CDT")
all_sites_corn$Time<-as.numeric(hour(all_sites_corn$Time)+minute(all_sites_corn$Time)/60)
# all_sites$Time<-lubridate::hour(all_sites$Time) + lubridate::minute(all_sites$Time)/60
total_corn$Year<-2019
total_corn$Time<- as.POSIXct(total_corn$Time, format=' %H:%M:%S', tz="CDT")
total_corn$Time<-as.numeric(hour(total_corn$Time)+minute(total_corn$Time)/60)
both_years_baiting<-bind_rows(all_sites_corn, total_corn)
# both_years_baiting$Time<- as.POSIXct(both_years_baiting$Time, format=' %I:%M:%S %p', tz="CDT")
# both_years_baiting$Time<-lubridate::hour(both_years_baiting$Time) + lubridate::minute(both_years_baiting$Time)/60
# both_years_baiting$Time<-as.numeric(both_years_baiting$Time)
S2<-ggplot(both_years_baiting, aes(Time, fill=as.factor(Year))) +
geom_density(alpha=0.5)+
xlab("Time of day: 24 hour cycle")+
ylab("Probability density")+
scale_x_continuous(breaks=c(0:24), limits=c(0,24))+
labs(fill = "Year")+
scale_fill_manual(values = col_year)+
theme_bw()
# tiff("S2.tiff", height =10 , width =8.7, units = "cm", compression = "lzw", res = 1200)
S2
# dev.off()
events_parley1 <- readRDS("~/Carp/2019/events_parley1.RDS")
gbi_parley1 <- readRDS("~/Carp/2019/gbi_parley1.RDS")
parley_gs1<-data.frame(bout=1:nrow(gbi_parley1), groupsize=rowSums(gbi_parley1))
parley_gs1$dur<-events_parley1$End-events_parley1$Start
parley_gs1$dur_min<-parley_gs1$dur/60
head(parley_gs1)
## bout groupsize dur dur_min
## 1 1 2 87 1.4500000
## 2 2 3 60 1.0000000
## 3 3 2 201 3.3500000
## 4 4 3 63 1.0500000
## 5 5 3 150 2.5000000
## 6 6 2 44 0.7333333
A<-ggplot(parley_gs1, aes(groupsize)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[1]) +
scale_x_continuous(breaks=2:16)+
xlab("Number of carp per feeding bout")+
ylab("Number of times observed")+
ggtitle("(A)")+
theme_bw()
# ggtitle("Parley (combined antenna): Average group size")
#histogram of feeding bout duration
B<-ggplot(parley_gs1, aes(dur_min)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[1]) +
scale_x_continuous(trans = "log2")+
xlab("Duration of feeding bout (min)")+
ylab("Number of times observed")+
ggtitle("(B)")+
theme_bw()
# ggtitle("Parley (combined antenna): Duration of feeding bout (min)")
# C<-ggplot(parley_gs1, aes(x=dur_min, y=groupsize)) +
# geom_point(size=2, shape=19, alpha=0.6)+
# scale_x_continuous(trans = "log2")+
# xlab("Bout duration (min)")+
# ylab("Group size")+
# ggtitle("(C)")+
# theme_bw()
# # ggtitle("Parley (combined antenna)")
#
#
# multiplot(A,B,C, cols=1)
C <- ggplot(parley_gs1,aes(x=as.factor(groupsize),y=dur_min, fill = as.factor(groupsize)))+
# geom_flat_violin(position = position_nudge(x = .2, y = 0),adjust =2)+
geom_boxplot()+
# geom_point(position = position_jitter(width = .15), size = .25, alpha=0.05)+
coord_flip()+theme_cowplot()+
scale_y_continuous(trans = "log2")+
ylab("Bout duration (min)")+
xlab("Group size")+
# ggtitle("(C)")+
theme(legend.position = "none")
C
# multiplot(A,B,C, cols=1)
top_row <- plot_grid(A, B) #, labels=c("(A)", "(B)"))
plot_grid(top_row, C, labels=c("", "(C)"), label_size=13, label_fontface = "plain", ncol = 1)
mean(parley_gs1$dur_min)
## [1] 6.131006
median(parley_gs1$dur_min)
## [1] 4.516667
max(parley_gs1$dur_min)
## [1] 238.6667
#2019
start_date<-as.POSIXct("2019-07-30 12:00:00 CDT") #rounded to nearest midday
end_date<-as.POSIXct("2019-08-16 12:00:00 CDT")
num_days<-as.numeric(end_date-start_date)
#Histogram of the number of sampling days each fish visits Boat Ramp
superfish_br<-calc_nvisit(df=br_corn, num_days=num_days, start_date=start_date)
br<-ggplot(superfish_br,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[1]) +
scale_x_continuous(breaks=0:20)+
xlab("")+
ylab("Number of unique fish")+
ggtitle("Site 1 (2019)")+
theme_bw()
#Histogram of the number of sampling days each fish visits Crown College
superfish_crown<-calc_nvisit(df=crown_corn, num_days=num_days, start_date=start_date)
cr<-ggplot(superfish_crown,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[1]) +
scale_x_continuous(breaks=0:20)+
xlab("")+
ylab("")+
ggtitle("Site 3 (2019)")+
theme_bw()
#2020
start_date<-as.POSIXct("2020-07-03 12:00:00 CDT") #rounded to nearest midday
end_date<-as.POSIXct("2020-07-23 12:00:00 CDT")
num_days<-as.numeric(end_date-start_date)
#Histogram of the number of sampling days each fish visits Site 1
superfish_s1<-calc_nvisit(df=s1_corn, num_days=num_days, start_date=start_date)
s1<-ggplot(superfish_s1,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("")+
ylab("Number of unique fish")+
ggtitle("Site 1 (2020)")+
theme_bw()
#Histogram of the number of sampling days each fish visits Site 2
superfish_s2<-calc_nvisit(df=s2_corn, num_days=num_days, start_date=start_date)
s2<-ggplot(superfish_s2,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("")+
ylab("")+
ggtitle("Site 2 (2020)")+
theme_bw()
#Histogram of the number of sampling days each fish visits Site 3
superfish_s3<-calc_nvisit(df=s3_corn, num_days=num_days, start_date=start_date)
s3<-ggplot(superfish_s3,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("")+
ylab("Number of unique fish")+
ggtitle("Site 3 (2020)")+
theme_bw()
#Histogram of the number of sampling days each fish visits Site 4
superfish_s4<-calc_nvisit(df=s4_corn, num_days=num_days, start_date=start_date)
s4<-ggplot(superfish_s4,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("")+
ylab("")+
ggtitle("Site 4 (2020)")+
theme_bw()
#Histogram of the number of sampling days each fish visits Site 5
superfish_s5<-calc_nvisit(df=s5_corn, num_days=num_days, start_date=start_date)
s5<-ggplot(superfish_s5,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("")+
ylab("Number of unique fish")+
ggtitle("Site 5 (2020)")+
theme_bw()
#Histogram of the number of sampling days each fish visits Site 6
superfish_s6<-calc_nvisit(df=s6_corn, num_days=num_days, start_date=start_date)
s6<-ggplot(superfish_s6,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("")+
ylab("")+
ggtitle("Site 6 (2020)")+
theme_bw()
#Histogram of the number of sampling days each fish visits Site 7
superfish_s7<-calc_nvisit(df=s7_corn, num_days=num_days, start_date=start_date)
s7<-ggplot(superfish_s7,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("Number of sampling days")+
ylab("Number of unique fish")+
ggtitle("Site 7 (2020)")+
theme_bw()
#Histogram of the number of sampling days each fish visits Site 8
superfish_s8<-calc_nvisit(df=s8_corn, num_days=num_days, start_date=start_date)
s8<-ggplot(superfish_s8,aes(nvisit)) +
geom_histogram(binwidth=1,boundary=-0.5, color="black", fill=col_year[2]) +
scale_x_continuous(breaks=0:20)+
xlab("Number of sampling days")+
ylab("")+
ggtitle("Site 8 (2020)")+
theme_bw()
multiplot(br, cr, s1, s2, s3, s4, s5, s6, s7, s8, cols=2)
superfish_wide<-superfish_wide %>% rename(PIT =fishID)
superfeeder_length<-merge(superfish_wide, pit_capture, by="PIT")
superfeeder_length$Length<-as.numeric(superfeeder_length$Length)
A<- ggplot(superfeeder_length, aes(x=as.factor(top20), y=Length))+
geom_boxplot()+
stat_summary(fun.y=mean, geom="point", shape=23, size=3, colour="blue")+
geom_jitter(shape=16, alpha=0.6, position=position_jitter(0.2))+
# theme(panel.background = element_blank())+
theme_classic()+
xlab("")+
ylab("Length (mm)")+
scale_x_discrete(labels=c("Non-superfeeders", "Superfeeders"))+
ggtitle("(A)")
length_top20<-superfeeder_length$Length[which(superfeeder_length$top20==TRUE)]
length_remaining80<-superfeeder_length$Length[which(superfeeder_length$top20==FALSE)]
mean(length_top20, na.rm=TRUE)
## [1] 697.9545
mean(length_remaining80, na.rm=TRUE)
## [1] 589.4623
sd(length_top20, na.rm=TRUE)
## [1] 55.51103
sd(length_remaining80, na.rm=TRUE)
## [1] 144.7108
t.test(length_top20, length_remaining80, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: length_top20 and length_remaining80
## t = 5.9045, df = 87.287, p-value = 6.631e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 71.97249 145.01207
## sample estimates:
## mean of x mean of y
## 697.9545 589.4623
superfeeder_length$top20<-superfeeder_length$top20*1
mylogit <- glm(top20 ~ Length, data = superfeeder_length, family = "binomial")
summary(mylogit)
##
## Call:
## glm(formula = top20 ~ Length, family = "binomial", data = superfeeder_length)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.4977 -0.6965 -0.4560 -0.1200 2.2832
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -9.101919 2.690600 -3.383 0.000717 ***
## Length 0.011429 0.003892 2.937 0.003318 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 117.46 on 127 degrees of freedom
## Residual deviance: 101.13 on 126 degrees of freedom
## (8 observations deleted due to missingness)
## AIC: 105.13
##
## Number of Fisher Scoring iterations: 6
exp(mylogit$coefficients["Length"])
## Length
## 1.011495
newdat <- data.frame(Length=seq(min(superfeeder_length$Length, na.rm=TRUE), max(superfeeder_length$Length, na.rm=TRUE),len=100))
newdat$top20 = predict(mylogit, newdata=newdat, type="response")
# plot(top20~Length, data=superfeeder_length, col="red4")
# lines(top20 ~ Length, newdat, col="green4", lwd=2)
B<-superfeeder_length %>% ggplot()+ geom_point(aes(x=Length, y=top20))+ geom_line(data=newdat, aes(x=Length, y=top20))+ xlab("Length (mm)") + ylab("Probability of being a superfeeder")+ theme_classic()+
ggtitle("(B)")
multiplot(A, B, cols=2)
### Figure S6. Superfeeding results for 2019 (unique daily visits)
#Combined plot showing site preference and number of detected visits by fish
superfish_br$Site<-"BR"
superfish_crown$Site<-"Crown"
all_superfish <- rbind(superfish_br,superfish_crown, Fill=NA)
all_superfish<-na.omit(all_superfish)
superfish_wide <- spread(all_superfish, Site, nvisit) #convert from long to wide data format
# superfish_wide <- subset(superfish_wide,select = fishID:Site8)
superfish_wide[is.na(superfish_wide)] <- 0 #convert NAs to zero
superfish_wide$total <- rowSums(superfish_wide[2:3])
superfish_wide<-superfish_wide[order(superfish_wide$total, decreasing=TRUE),]
all_superfish$fishID <- factor(all_superfish$fishID, levels = superfish_wide$fishID[order(superfish_wide$total, decreasing=TRUE)])
top20<-round(length(levels(all_superfish$fishID))*.2)
cutoff<-levels(all_superfish$fishID)[top20]
A<-ggplot(all_superfish, aes(fill=Site, y=nvisit, x=fishID))+
geom_col()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line=element_blank(), axis.text.x=element_blank(), axis.ticks = element_blank())+
scale_fill_manual(name = "Site", labels = c("Site 1", "Site 3"), values=c(col_sites[1], col_sites[3]))+
labs(y= "Visits per unique day", x = "Individual carp")+
geom_vline(xintercept=which(levels(all_superfish$fishID) == cutoff))+
ggtitle("(A)")
A
levels(all_superfish$fishID)
## [1] "775172" "497631" "497877" "775213" "038365" "497693" "497849" "038301"
## [9] "038315" "497622" "497630" "775180" "038368" "038369" "497601" "497639"
## [17] "497661" "497675" "497690" "497887" "775170" "775173" "775186" "038335"
## [25] "038381" "038388" "497600" "497641" "497870" "775217" "775219" "775241"
## [33] "038387" "038393" "497628" "497636" "497649" "497771" "497871" "775194"
## [41] "775203" "775204" "775215" "775239" "038355" "497766" "775234" "497627"
## [49] "038334" "038344" "497753" "497865" "497875" "038377" "497822" "497891"
## [57] "775177" "497827" "038322" "038363" "497656" "497811" "497876" "038361"
## [65] "497684" "497778" "497841" "497855" "497680" "497764" "775235" "038391"
## [73] "497805" "497836" "497892" "775199" "038325" "497663" "038305" "038313"
## [81] "038326" "038339" "497626" "497673" "497804" "497806" "775216" "038318"
## [89] "038324" "038347" "038349" "038351" "038370" "038382" "038385" "038390"
## [97] "497604" "497620" "497632" "497646" "497652" "497686" "497692" "497867"
## [105] "497874" "775169" "775249"
top20_ID<-levels(all_superfish$fishID)[1:top20]
top20_superfish<-all_superfish[which(all_superfish$fishID %in% top20_ID),]
inset.plot<- ggplot(top20_superfish, aes(fill=Site, y=nvisit, x=fishID))+
geom_col()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.ticks = element_blank(), axis.text.x = element_text(size=6, angle = 90, hjust = 1), legend.position = "none")+
scale_fill_manual(name = "Site", labels = c("Site 1", "Site 3"), values=c(col_sites[1], col_sites[3]))+
labs(y= "Visits", x = "")
inset.plot
library(cowplot)
plot.with.inset <-
ggdraw() +
draw_plot(A) +
draw_plot(inset.plot, x = 0.4, y = 0.5, width = .45, height = .50)
plot.with.inset
#scatter plot of number of visits vs % of time spent at preferred/dominate/most detected site
superfeeder_bool<-data.frame(fishID=all_superfish$fishID, top20=(all_superfish$fishID %in% top20_ID))
superfish_wide$max_per_site<- apply(X=superfish_wide[,2:3], MARGIN=1, FUN=max)
superfish_wide$percentage_max<-superfish_wide$max_per_site/superfish_wide$total*100
superfish_wide$top20<-superfeeder_bool$top20[match(superfish_wide$fishID,superfeeder_bool$fishID)]
B<- ggplot(superfish_wide, aes(x=total, y=percentage_max))+
geom_point(aes(colour = top20, shape=top20), alpha=0.5)+
# geom_abline(intercept = 0, slope = 100/30)+
xlab("Total number of visits")+
ylab("Proportion of visits at top visited Site")+
ggtitle("(B)")+
# ggtitle("Relative 'Site Fidelity' for Superfeeders")+
geom_text(aes(label = ifelse(top20 == 'TRUE', fishID, NA)), check_overlap= TRUE, colour="black", vjust="inward", hjust="inward")+
scale_shape_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=c(15,17))+
scale_colour_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=col_sfeeder)+
theme_bw()
B
multiplot(plot.with.inset, B, cols=1)
top20_ID_2019<-top20_ID
sum(top20_ID_2019 %in% top20_ID_2020)
## [1] 5
length(top20_ID_2019)
## [1] 21
sum(top20_ID_2020 %in% top20_ID_2019)
## [1] 5
length(top20_ID_2020)
## [1] 24
top20_ID_2020[which(top20_ID_2020 %in% top20_ID_2019)]
## [1] "775172" "497661" "497690" "497601" "775170"
superfish_wide2019<-superfish_wide
#Gini curve for unique daily visits
lorenz<-all_superfish %>% group_by(fishID) %>% summarize(nvisit=sum(nvisit)) %>% arrange(nvisit) %>% mutate(forage_activity_rank= 1:nrow(.), prop_forager= forage_activity_rank/nrow(.), cum_visits=cumsum(nvisit)/sum(nvisit), one_to_one= prop_forager)
fwrite(lorenz, "/research-home/lwhite/Carp/Superfeeder_rankings/daily_visits2019.csv")
#Gini coefficient calculation
areaB<-AUC(x=append(0, lorenz$prop_forager), y=append(0, lorenz$cum_visits), method="trapezoid")
areaA<-0.5-areaB
(gini_coeff<-areaA/(areaA+areaB))
## [1] 0.3849874
lorenzC<-lorenz %>% ggplot()+geom_line(aes(x=prop_forager, y=cum_visits))+ geom_line(aes(x=prop_forager, y=one_to_one), lty="dashed")+ theme_bw()+
geom_vline(xintercept = 0.8 , lty="dotted", col="dark red") +
geom_hline(yintercept = 0.62 , lty="dotted", col="dark red") +
geom_vline(xintercept = 0.5 , lty="dotted", col="dark blue") +
geom_hline(yintercept = 0.2 , lty="dotted", col="dark blue") +
xlab("Cumulative Fraction of Population Based on Foraging Ranking")+ ylab("Cumulative Number of\nUnique Daily Visits")+ scale_x_continuous(limits = c(0,1))+ scale_y_continuous(limits = c(0,1))+ ggtitle(paste("Lorenz Curve: 2019 Unique Daily Visits\n(Gini Coefficient:", round(gini_coeff,3), ")"))
## Gini curve for raw total counts
br_raw_count <-br_corn %>% group_by(PIT) %>% summarise(count = n()) %>% arrange(count)
crown_raw_count <-crown_corn %>% group_by(PIT) %>% summarise(count = n()) %>% arrange(count)
raw_count2019<-bind_rows(br_raw_count, crown_raw_count) %>% group_by(PIT) %>% summarize(count=sum(count))%>% arrange(count)
lorenz<-raw_count2019 %>% mutate(forage_activity_rank= 1:nrow(.), prop_forager= forage_activity_rank/nrow(.), cum_visits=cumsum(count)/sum(count), one_to_one= prop_forager)
fwrite(lorenz, "/research-home/lwhite/Carp/Superfeeder_rankings/total_detections2019.csv")
#Gini coefficient calculation
areaB<-AUC(x=append(0, lorenz$prop_forager), y=append(0, lorenz$cum_visits), method="trapezoid")
areaA<-0.5-areaB
(gini_coeff<-areaA/(areaA+areaB))
## [1] 0.6634699
lorenzD<-lorenz %>% ggplot()+geom_line(aes(x=prop_forager, y=cum_visits))+ geom_line(aes(x=prop_forager, y=one_to_one), lty="dashed")+ theme_bw()+
geom_vline(xintercept = 0.8 , lty="dotted", col="dark red") +
geom_hline(yintercept = 0.34 , lty="dotted", col="dark red") +
geom_vline(xintercept = 0.71 , lty="dotted", col="dark blue") +
geom_hline(yintercept = 0.2 , lty="dotted", col="dark blue") +
xlab("Cumulative Fraction of Population Based on Foraging Ranking")+ ylab("Cumulative Number\nof Total Detections")+ scale_x_continuous(limits = c(0,1))+ scale_y_continuous(limits = c(0,1))+ ggtitle(paste("Lorenz Curve: 2019 Total Visits\n(Gini Coefficient:", round(gini_coeff,3), ")"))
plot_grid(lorenzA, lorenzB, lorenzC, lorenzD, ncol=2, labels="AUTO")
### Figure S6.v2 Superfeeding results for 2019 (Total cumulative visits)
## Gini curve for raw total counts
br_raw_count <-br_corn %>% group_by(PIT) %>% summarise(count = n()) %>% arrange(count) %>% mutate(Site="1")
crown_raw_count <-crown_corn %>% group_by(PIT) %>% summarise(count = n()) %>% arrange(count) %>% mutate(Site="3")
raw_count2019<-bind_rows(br_raw_count, crown_raw_count) %>% group_by(PIT) %>% summarize(count=sum(count))%>% arrange(count)
lorenz<-raw_count2019 %>% mutate(forage_activity_rank= 1:nrow(.), prop_forager= forage_activity_rank/nrow(.), cum_visits=cumsum(count)/sum(count), one_to_one= prop_forager)
#Gini coefficient calculation
areaB<-AUC(x=append(0, lorenz$prop_forager), y=append(0, lorenz$cum_visits), method="trapezoid")
areaA<-0.5-areaB
(gini_coeff<-areaA/(areaA+areaB))
## [1] 0.6634699
lorenzD<-lorenz %>% ggplot()+geom_line(aes(x=prop_forager, y=cum_visits))+ geom_line(aes(x=prop_forager, y=one_to_one), lty="dashed")+ theme_bw()+
geom_vline(xintercept = 0.8 , lty="dotted", col="dark red") +
geom_hline(yintercept = 0.34 , lty="dotted", col="dark red") +
geom_vline(xintercept = 0.71 , lty="dotted", col="dark blue") +
geom_hline(yintercept = 0.2 , lty="dotted", col="dark blue") +
xlab("Cumulative Fraction of Population Based on Foraging Ranking")+ ylab("Cumulative Number\nof Total Detections")+ scale_x_continuous(limits = c(0,1))+ scale_y_continuous(limits = c(0,1))+ ggtitle("(A)")
# + ggtitle(paste("Lorenz Curve: 2019 Total Visits\n(Gini Coefficient:", round(gini_coeff,3), ")"))
#Plot total visits as a summation of visits across sites by FishID
all_superfish <- bind_rows(br_raw_count, crown_raw_count) %>% group_by(PIT, Site) %>% arrange(count) %>% na.omit() %>% rename(fishID=PIT, nvisit=count)
superfish_wide <- spread(all_superfish, Site, nvisit) #convert from long to wide data format
# superfish_wide <- subset(superfish_wide,select = fishID:Site8)
superfish_wide[is.na(superfish_wide)] <- 0 #convert NAs to zero
superfish_wide$total <- rowSums(superfish_wide[2:3])
superfish_wide<-superfish_wide[order(superfish_wide$total, decreasing=TRUE),]
# superfish_wide$short_lab<-LETTERS[seq( from = 1, to = nrow(superfish_wide))]
all_superfish$fishID <- factor(all_superfish$fishID, levels = superfish_wide$fishID[order(superfish_wide$total, decreasing=TRUE)])
# top20<-round(length(levels(all_superfish$fishID))*.18)
top20<-which(lorenz$prop_forager>0.71)[1]
# cutoff<-levels(all_superfish$fishID)[top20]
cutoff<-lorenz$PIT[top20]
B<-ggplot(all_superfish, aes(fill=Site, y=nvisit, x=fishID))+
geom_col()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.line=element_blank(), axis.text.x=element_blank(), axis.ticks = element_blank())+
# scale_y_continuous(trans = "log2")+
scale_fill_manual(name = "Site", labels = c("Site 1", "Site 3"), values=c(col_sites[1], col_sites[3]))+
labs(y= "Total cumulative detections", x = "Individual carp")+
geom_vline(xintercept=which(levels(all_superfish$fishID) == cutoff))+
ggtitle("(B)")
B
levels(all_superfish$fishID)
## [1] "775194" "497630" "497600" "497877" "497601" "497641" "497849" "497631"
## [9] "775217" "497661" "775172" "775173" "497639" "497693" "038301" "497871"
## [17] "775241" "038381" "775215" "038369" "038393" "775239" "038315" "038335"
## [25] "038355" "775213" "497627" "775170" "497690" "775234" "775186" "497822"
## [33] "497771" "038368" "775203" "775180" "497622" "038387" "497764" "775219"
## [41] "497891" "497636" "497887" "038388" "497870" "497841" "775204" "775235"
## [49] "497855" "038365" "038344" "497649" "038313" "775177" "497827" "497766"
## [57] "497875" "497876" "038377" "497753" "497865" "497684" "497656" "038322"
## [65] "775199" "497628" "038339" "038363" "497805" "497675" "497778" "038334"
## [73] "038361" "038391" "497811" "038325" "497626" "497680" "497836" "497804"
## [81] "497620" "775216" "497806" "497892" "497646" "038326" "038351" "038305"
## [89] "497663" "497673" "038349" "497874" "497692" "038318" "038347" "497604"
## [97] "497632" "497686" "497867" "775169" "038324" "038370" "038382" "038385"
## [105] "038390" "497652" "775249"
top20_ID<-levels(all_superfish$fishID)[1:which(levels(all_superfish$fishID)==cutoff)]
top20_superfish<-all_superfish[which(all_superfish$fishID %in% top20_ID),]
inset.plot<- ggplot(top20_superfish, aes(fill=Site, y=nvisit, x=fishID))+
geom_col()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank(), axis.ticks = element_blank(), axis.text.x = element_text(size=6, angle = 90, hjust = 1), legend.position = "none")+
scale_fill_manual(name = "Site", labels = c("Site 1", "Site 3"), values=c(col_sites[1], col_sites[3]))+
labs(y= "Visits", x = "")
inset.plot
library(cowplot)
plot.with.inset <-
ggdraw() +
draw_plot(B) +
draw_plot(inset.plot, x = 0.3, y = .45, width = .55, height = .55)
plot.with.inset
#scatter plot of number of visits vs % of time spent at preferred/dominate/most detected site
superfeeder_bool<-data.frame(fishID=all_superfish$fishID, top20=(all_superfish$fishID %in% top20_ID))
superfish_wide$max_per_site<- apply(X=superfish_wide[,2:3], MARGIN=1, FUN=max)
superfish_wide$percentage_max<-superfish_wide$max_per_site/superfish_wide$total*100
superfish_wide$top20<-superfeeder_bool$top20[match(superfish_wide$fishID,superfeeder_bool$fishID)]
C<- ggplot(superfish_wide, aes(x=total, y=percentage_max))+
geom_point(aes(colour = top20, shape=top20), alpha=0.5)+
# geom_abline(intercept = 0, slope = 100/30)+
xlab("Total number of visits")+
ylab("Proportion of visits at top \nvisited Site")+
ggtitle("(C)")+
# ggtitle("Relative 'Site Fidelity' for Superfeeders")+
geom_text(aes(label = ifelse(top20 == 'TRUE', fishID, NA)), check_overlap= TRUE, colour="black", vjust="inward", hjust="inward")+
scale_shape_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=c(15,17))+
scale_colour_manual(name="Individual \ncarp", labels = c("Non-superfeeders", "Superfeeders"), values=col_sfeeder)+
theme_bw()
C
top20_ID_2019<-top20_ID
multiplot(lorenzD, plot.with.inset, C, cols=1)
site_fidelity_top20<-superfish_wide %>% filter(top20==TRUE) %>% select(percentage_max)
site_fidelity_remaining80 <-superfish_wide %>% filter(top20==FALSE) %>% select(percentage_max)
mean(site_fidelity_top20$percentage_max)
## [1] 96.66726
mean(site_fidelity_remaining80$percentage_max, na.rm=TRUE)
## [1] 95.58625
sd(site_fidelity_top20$percentage_max)
## [1] 7.935694
sd(site_fidelity_remaining80$percentage_max)
## [1] 9.951133
t.test(site_fidelity_top20$percentage_max, site_fidelity_remaining80$percentage_max, var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: site_fidelity_top20$percentage_max and site_fidelity_remaining80$percentage_max
## t = 0.59613, df = 72.819, p-value = 0.5529
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.533189 4.695196
## sample estimates:
## mean of x mean of y
## 96.66726 95.58625
superfish_wide2019<-superfish_wide
#2020
A <-first_detect2020 %>% mutate(time_since= difftime(DATEIME, min(first_detect2020$DATEIME), units="days")) %>% left_join(superfish_wide2020 %>% select(fishID, top20) %>% rename(PIT=fishID), by="PIT") %>% ggplot(aes(x=DATEIME, y=as.factor(top20)))+ geom_point(alpha=0.5)+ geom_boxplot()+ theme_classic()+xlab("Date (2020)")+ylab("Superfeeder") + ggtitle("(A)")
#2019
# breaks.index <- seq.POSIXt(from=as.POSIXct(min(first_detect$DATEIME), format="%Y-%m-%d"), to=as.POSIXct(min(first_detect$DATEIME), format="%Y-%m-%d"), by="1 week")
B<-first_detect_2019 %>% left_join(superfish_wide2019 %>% select(fishID, top20) %>% rename(PIT=fishID), by="PIT") %>% #mutate(DATETIME=as.numeric(DATEIME))%>%
ggplot(aes(x=DATEIME, y=as.factor(top20)))+ geom_point(alpha=0.5)+ geom_boxplot()+
theme_classic()+xlab("Date (2019)")+ ylab("Superfeeder") + ggtitle("(B)") #+ scale_x_datetime(breaks = breaks.index, labels = format(breaks.index, "%Y-%m-%d"))
multiplot(A, B, cols=1)
detection_date_2020_SF<-first_detect2020 %>% mutate(time_since= difftime(DATEIME, min(first_detect2020$DATEIME), units="days")) %>% left_join(superfish_wide2020 %>% select(fishID, top20) %>% rename(PIT=fishID), by="PIT") %>% filter(top20==TRUE)
median(detection_date_2020_SF$DATEIME)
## [1] "2020-07-04 03:51:25 CDT"
median(detection_date_2020_SF$time_since)
## Time difference of 0.5759028 days
detection_date_2020_NSF<-first_detect2020 %>% mutate(time_since= difftime(DATEIME, min(first_detect2020$DATEIME), units="days")) %>% left_join(superfish_wide2020 %>% select(fishID, top20) %>% rename(PIT=fishID), by="PIT") %>% filter(top20==FALSE)
median(detection_date_2020_NSF$DATEIME, na.rm=TRUE)
## [1] "2020-07-06 04:08:57 CDT"
median(detection_date_2020_NSF$time_since, na.rm=TRUE)
## Time difference of 2.588079 days
detection_date_2019_SF<-first_detect_2019 %>% mutate(time_since= difftime(DATEIME, min(first_detect_2019$DATEIME), units="days")) %>% left_join(superfish_wide2019 %>% select(fishID, top20) %>% rename(PIT=fishID), by="PIT") %>% filter(top20==TRUE)
median(detection_date_2019_SF$DATEIME)
## [1] "2019-08-01 18:08:50 CDT"
median(detection_date_2019_SF$time_since)
## Time difference of 1.793293 days
detection_date_2019_NSF<-first_detect_2019 %>% mutate(time_since= difftime(DATEIME, min(first_detect_2019$DATEIME), units="days"))%>% left_join(superfish_wide2019 %>% select(fishID, top20) %>% rename(PIT=fishID), by="PIT") %>% filter(top20==FALSE)
median(detection_date_2019_NSF$DATEIME, na.rm=TRUE)
## [1] "2019-08-03 03:16:09 CDT"
median(detection_date_2019_NSF$time_since, na.rm=TRUE)
## Time difference of 3.173368 days
top20_ID_2019
## [1] "775194" "497630" "497600" "497877" "497601" "497641" "497849" "497631"
## [9] "775217" "497661" "775172" "775173" "497639" "497693" "038301" "497871"
## [17] "775241" "038381" "775215" "038369" "038393" "775239" "038315" "038335"
## [25] "038355" "775213" "497627" "775170" "497690" "775234" "775186" "497822"
length(top20_ID_2019)
## [1] 32
top20_ID_2020
## [1] "38397" "775194" "497673" "497890" "497862" "497637" "497649" "497645"
## [9] "497672" "497766" "497666" "497815" "775172" "775215" "497661" "497690"
## [17] "38320" "497871" "775204" "497601" "775170" "497896" "497608" "775217"
length(top20_ID_2020)
## [1] 24
sum(top20_ID_2020 %in% top20_ID_2019)/length(top20_ID_2019)
## [1] 0.28125
sum(top20_ID_2019 %in% top20_ID_2020)/length(top20_ID_2020)
## [1] 0.375
top20_ID_2019[top20_ID_2020 %in% top20_ID_2019]
## [1] "497630" "497639" "497693" "038301" "497871" "038381" "038369" "038393"
## [9] "038335" "775213"
top20_ID_2020[top20_ID_2019 %in% top20_ID_2020]
## [1] "38397" "497862" "497672" "497766" "497666" "497690" "775204" NA
## [9] NA